import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import the dataset
data=pd.read_csv('indemnity.csv')
data.head(5)
| age | gender | Body mass index | kids | chronic illness | area | bills | |
|---|---|---|---|---|---|---|---|
| 0 | 19 | female | 27.900 | 0 | yes | southwest | 16884.92400 |
| 1 | 18 | male | 33.770 | 1 | no | southeast | 1725.55230 |
| 2 | 28 | male | 33.000 | 3 | no | southeast | 4449.46200 |
| 3 | 33 | male | 22.705 | 0 | no | northwest | 21984.47061 |
| 4 | 32 | male | 28.880 | 0 | no | northwest | 3866.85520 |
data.describe()
| age | Body mass index | kids | bills | |
|---|---|---|---|---|
| count | 1338.000000 | 1338.000000 | 1338.000000 | 1338.000000 |
| mean | 39.207025 | 30.663397 | 1.094918 | 13270.422265 |
| std | 14.049960 | 6.098187 | 1.205493 | 12110.011237 |
| min | 18.000000 | 15.960000 | 0.000000 | 1121.873900 |
| 25% | 27.000000 | 26.296250 | 0.000000 | 4740.287150 |
| 50% | 39.000000 | 30.400000 | 1.000000 | 9382.033000 |
| 75% | 51.000000 | 34.693750 | 2.000000 | 16639.912515 |
| max | 64.000000 | 53.130000 | 5.000000 | 63770.428010 |
verification if there is null values in the dataset
data.isnull().sum()
age 0 gender 0 Body mass index 0 kids 0 chronic illness 0 area 0 bills 0 dtype: int64
f,ax=plt.subplots(1,2,figsize=(18,8))
data['gender'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True)
ax[0].set_title('gender')
ax[0].set_ylabel('')
sns.countplot('gender',data=data,ax=ax[1])
ax[1].set_title('gender')
plt.show()
data.groupby(['gender','chronic illness'])['chronic illness'].count()
gender chronic illness
female no 547
yes 115
male no 517
yes 159
Name: chronic illness, dtype: int64
f,ax=plt.subplots(1,2,figsize=(18,8))
data['chronic illness'].value_counts().plot.bar(ax=ax[0])
ax[0].set_title('chronic illness by gender')
sns.countplot('gender',hue='chronic illness',data=data,ax=ax[1])
ax[1].set_title('number of chronic illness by gender')
plt.show()
fig = px.histogram(data, x="kids", y="bills", color="gender")
fig.show()
fig = px.histogram(data, x="Body mass index", y="bills", color="chronic illness")
fig.show()
fig = px.density_heatmap(data, x="age", y="Body mass index", marginal_x="rug", marginal_y="histogram")
fig.show()
fig = px.histogram(data, x="age", y="Body mass index", color="chronic illness")
fig.show()
that's all
Thank you for taking the time to read this notebook